Code
library(tidyverse)
library(tidymodels)Tony Duan
January 1, 2024





<Training/Testing/Total>
<699/233/932>
Rows: 699
Columns: 9
$ city <fct> SACRAMENTO, LINCOLN, SACRAMENTO, ELK_GROVE, SACRAMENTO, SACR…
$ zip <fct> z95842, z95648, z95835, z95758, z95831, z95864, z95624, z956…
$ beds <int> 4, 5, 3, 3, 2, 3, 4, 2, 3, 4, 2, 3, 3, 4, 3, 2, 3, 3, 5, 3, …
$ baths <dbl> 2.0, 3.0, 2.0, 2.0, 2.0, 1.0, 3.0, 2.0, 2.0, 4.0, 1.0, 2.0, …
$ sqft <int> 1292, 3072, 1120, 1273, 1324, 1643, 3992, 1315, 1262, 2213, …
$ type <fct> Residential, Residential, Residential, Residential, Resident…
$ price <int> 105000, 315000, 209000, 190000, 234500, 99000, 460000, 23000…
$ latitude <dbl> 38.67960, 38.86641, 38.68195, 38.43124, 38.48797, 38.58867, …
$ longitude <dbl> -121.3560, -121.3085, -121.5050, -121.4400, -121.5302, -121.…
Rows: 233
Columns: 9
$ city <fct> SACRAMENTO, SACRAMENTO, SACRAMENTO, SACRAMENTO, ELK_GROVE, S…
$ zip <fct> z95823, z95841, z95823, z95827, z95758, z95828, z95660, z958…
$ beds <int> 3, 3, 4, 3, 2, 4, 4, 4, 3, 3, 3, 3, 3, 4, 3, 3, 4, 3, 3, 4, …
$ baths <dbl> 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 1, 2, 2, 2, 3, 2, …
$ sqft <int> 1167, 1122, 1329, 1380, 1039, 1146, 1587, 1590, 1463, 1406, …
$ type <fct> Residential, Condo, Residential, Residential, Condo, Residen…
$ price <int> 68212, 89921, 122682, 136500, 141000, 149593, 161500, 173000…
$ latitude <dbl> 38.47890, 38.66260, 38.46817, 38.56666, 38.42325, 38.49857, …
$ longitude <dbl> -121.4310, -121.3278, -121.4441, -121.3326, -121.4445, -121.…




# 10-fold cross-validation using stratification
# A tibble: 10 × 2
splits id
<list> <chr>
1 <split [627/72]> Fold01
2 <split [627/72]> Fold02
3 <split [627/72]> Fold03
4 <split [628/71]> Fold04
5 <split [628/71]> Fold05
6 <split [630/69]> Fold06
7 <split [631/68]> Fold07
8 <split [631/68]> Fold08
9 <split [631/68]> Fold09
10 <split [631/68]> Fold10
sample with replacement

# Bootstrap sampling using stratification
# A tibble: 25 × 2
splits id
<list> <chr>
1 <split [699/255]> Bootstrap01
2 <split [699/263]> Bootstrap02
3 <split [699/255]> Bootstrap03
4 <split [699/258]> Bootstrap04
5 <split [699/254]> Bootstrap05
6 <split [699/249]> Bootstrap06
7 <split [699/258]> Bootstrap07
8 <split [699/259]> Bootstrap08
9 <split [699/268]> Bootstrap09
10 <split [699/256]> Bootstrap10
# ℹ 15 more rows
feature engineering





[Youtuber]3 Reasons to Use Tidymodels with Julia Silge
https://www.youtube.com/watch?v=sv5r7CVAVwo
---
title: "Tidymodels introduction"
subtitle: "3 Reasons to Use Tidymodels"
author: "Tony Duan"
date: "2024-01-01"
categories: [analysis]
execute:
warning: false
error: false
format:
html:
toc: true
code-fold: show
code-tools: true
number-sections: true
code-block-bg: true
code-block-border-left: "#31BAE9"
---
```{r}
library(tidyverse)
library(tidymodels)
```
# model type
{width="900"}
{width="900"}
# specify model
{width="900"}
{width="900"}
{width="900"}
# Data splitting with rsample
```{r}
home_split <- initial_split(Sacramento,prop=0.75)
home_split
```
```{r}
home_train=training(home_split)
glimpse(home_train)
```
```{r}
home_test=testing(home_split)
glimpse(home_test)
```
{width="900"}
{width="900"}
{width="900"}
## closs validation
{width="900"}
```{r}
set.seed(123)
vfold_cv(home_train,strata = price)
```
## Bootstrapping
sample with replacement
{width="900"}
```{r}
set.seed(123)
bootstraps(home_train,strata = price)
```
# recipe
feature engineering
{width="900"}
{width="900"}
{width="900"}
{width="900"}
# workflow and model
{width="900"}
# Reference
[Youtuber]3 Reasons to Use Tidymodels with Julia Silge
https://www.youtube.com/watch?v=sv5r7CVAVwo